In [1]:
import pandas as pd
import datetime
import os
import plotly.express as px
In [2]:
# Highway vehicle detections

df_traffic_data_autobahn_total = None

# assign directory
directory = 'traffic data/autobahn data'
 
# iterate over files in
# that directory
for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(file_path):
        print(file_path)

        # Open File
        df_traffic_data_autobahn_year = pd.read_csv(file_path,  delimiter=';')

        df_traffic_data_autobahn_year['Datum'] = '20' + df_traffic_data_autobahn_year['Datum'].apply(str)

        df_traffic_data_autobahn_year['Datum'] = pd.to_datetime(df_traffic_data_autobahn_year['Datum'], format='%Y%m%d')

        df_traffic_data_autobahn_year['Autobahn_KFZ_Total'] = df_traffic_data_autobahn_year['KFZ_R1'] + df_traffic_data_autobahn_year['KFZ_R2']

        df_traffic_data_autobahn_year = df_traffic_data_autobahn_year[['Datum','Autobahn_KFZ_Total']]

        # Step 4: Group KFZ_R1 and KFZ_R2 values on date. 
        df_traffic_data_autobahn_year = df_traffic_data_autobahn_year.groupby('Datum').sum('Autobahn_KFZ_Total')

        if df_traffic_data_autobahn_total is None:
            df_traffic_data_autobahn_total = df_traffic_data_autobahn_year
        else:
            df_traffic_data_autobahn_total = pd.concat([df_traffic_data_autobahn_total, df_traffic_data_autobahn_year])
        

df_traffic_data_autobahn_total['Autobahn_KFZ_Total_Rolling_Average_7_Days'] = df_traffic_data_autobahn_total.rolling(7, min_periods=1).mean()
        
# Read File
df_traffic_data_autobahn_total
traffic data/autobahn data\2017_A_S.txt
traffic data/autobahn data\2018_A_S.txt
traffic data/autobahn data\2019_A_S.txt
traffic data/autobahn data\2020_A_S.txt
traffic data/autobahn data\2021_A_S.txt
Out[2]:
Autobahn_KFZ_Total Autobahn_KFZ_Total_Rolling_Average_7_Days
Datum
2017-01-01 26975627 2.697563e+07
2017-01-02 41560509 3.426807e+07
2017-01-03 39117647 3.588459e+07
2017-01-04 36828052 3.612046e+07
2017-01-05 39337185 3.676380e+07
... ... ...
2021-12-27 42884833 4.293691e+07
2021-12-28 41452599 4.184089e+07
2021-12-29 41882438 4.037421e+07
2021-12-30 43390035 3.904579e+07
2021-12-31 26765605 3.820287e+07

1826 rows × 2 columns

In [3]:
# Federal roads vehicle detections

df_traffic_data_bundesstrassen_total = None

# assign directory
directory = 'traffic data/bundesstrassen data'
 
# iterate over files in
# that directory
for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(file_path):
        print(file_path)

        # Open File
        df_traffic_data_bundesstrassen_year = pd.read_csv(file_path, delimiter=';', low_memory=False)

        df_traffic_data_bundesstrassen_year['Datum'] = '20' + df_traffic_data_bundesstrassen_year['Datum'].apply(str)

        df_traffic_data_bundesstrassen_year['Datum'] = pd.to_datetime(df_traffic_data_bundesstrassen_year['Datum'], format='%Y%m%d')

        df_traffic_data_bundesstrassen_year['Bundesstrassen_KFZ_Total'] = df_traffic_data_bundesstrassen_year['KFZ_R1'] + df_traffic_data_bundesstrassen_year['KFZ_R2']

        df_traffic_data_bundesstrassen_year = df_traffic_data_bundesstrassen_year[['Datum','Bundesstrassen_KFZ_Total']]

        # Step 4: Group KFZ_R1 and KFZ_R2 values on date. 
        df_traffic_data_bundesstrassen_year = df_traffic_data_bundesstrassen_year.groupby('Datum').sum('Bundesstrassen_KFZ_Total')

        if df_traffic_data_bundesstrassen_total is None:
            df_traffic_data_bundesstrassen_total = df_traffic_data_bundesstrassen_year
        else:
            df_traffic_data_bundesstrassen_total = pd.concat([df_traffic_data_bundesstrassen_total, df_traffic_data_bundesstrassen_year])
        

df_traffic_data_bundesstrassen_total['Bundesstrassen_KFZ_Total_Rolling_Average_7_Days'] = df_traffic_data_bundesstrassen_total.rolling(7, min_periods=1).mean()
        
# Read File
df_traffic_data_bundesstrassen_total
traffic data/bundesstrassen data\2017_B_S.txt
traffic data/bundesstrassen data\2018_B_S.txt
traffic data/bundesstrassen data\2019_B_S.txt
traffic data/bundesstrassen data\2020_B_S.txt
traffic data/bundesstrassen data\2021_B_S.txt
Out[3]:
Bundesstrassen_KFZ_Total Bundesstrassen_KFZ_Total_Rolling_Average_7_Days
Datum
2017-01-01 3886847 3.886847e+06
2017-01-02 7362853 5.624850e+06
2017-01-03 7740331 6.330010e+06
2017-01-04 7537968 6.632000e+06
2017-01-05 8039175 6.913435e+06
... ... ...
2021-12-27 7923371 7.957952e+06
2021-12-28 8050999 7.596847e+06
2021-12-29 8310961 7.272411e+06
2021-12-30 8555059 7.103476e+06
2021-12-31 5568421 7.044972e+06

1826 rows × 2 columns

In [4]:
# Plot and check highway vehicle detections data
fig = px.line(df_traffic_data_autobahn_total, x=df_traffic_data_autobahn_total.index, y='Autobahn_KFZ_Total', markers = True)

fig.show()
In [5]:
# Plot and check highway vehicle detections smoothed moving average 7 days data
fig = px.line(df_traffic_data_autobahn_total, x=df_traffic_data_autobahn_total.index, y='Autobahn_KFZ_Total_Rolling_Average_7_Days', markers = True)

fig.show()
In [6]:
# Plot and check federal roads vehicle detections data
fig = px.line(df_traffic_data_bundesstrassen_total, x=df_traffic_data_bundesstrassen_total.index, y='Bundesstrassen_KFZ_Total', markers = True)

fig.show()
In [7]:
# Plot and check federal roads vehicle detections smoothed moving average 7 days data
fig = px.line(df_traffic_data_bundesstrassen_total, x=df_traffic_data_bundesstrassen_total.index, y='Bundesstrassen_KFZ_Total_Rolling_Average_7_Days', markers = True)

fig.show()
In [8]:
# Combine the two dataframes using a outer join 
# This allows for all the information to be available per day on row level

df_traffic_data_total = pd.merge(df_traffic_data_autobahn_total, df_traffic_data_bundesstrassen_total, how='left', on = 'Datum')
In [9]:
# Add the vehicle detections for highways and federal roads to get the total result

df_traffic_data_total['Total'] = df_traffic_data_total['Autobahn_KFZ_Total'] + df_traffic_data_total['Bundesstrassen_KFZ_Total']
df_traffic_data_total['Total_Rolling_Average_7_Days'] = df_traffic_data_total['Autobahn_KFZ_Total_Rolling_Average_7_Days'] + df_traffic_data_total['Bundesstrassen_KFZ_Total_Rolling_Average_7_Days']

# Show total dataframe
df_traffic_data_total
Out[9]:
Autobahn_KFZ_Total Autobahn_KFZ_Total_Rolling_Average_7_Days Bundesstrassen_KFZ_Total Bundesstrassen_KFZ_Total_Rolling_Average_7_Days Total Total_Rolling_Average_7_Days
Datum
2017-01-01 26975627 2.697563e+07 3886847 3.886847e+06 30862474 3.086247e+07
2017-01-02 41560509 3.426807e+07 7362853 5.624850e+06 48923362 3.989292e+07
2017-01-03 39117647 3.588459e+07 7740331 6.330010e+06 46857978 4.221460e+07
2017-01-04 36828052 3.612046e+07 7537968 6.632000e+06 44366020 4.275246e+07
2017-01-05 39337185 3.676380e+07 8039175 6.913435e+06 47376360 4.367724e+07
... ... ... ... ... ... ...
2021-12-27 42884833 4.293691e+07 7923371 7.957952e+06 50808204 5.089486e+07
2021-12-28 41452599 4.184089e+07 8050999 7.596847e+06 49503598 4.943774e+07
2021-12-29 41882438 4.037421e+07 8310961 7.272411e+06 50193399 4.764662e+07
2021-12-30 43390035 3.904579e+07 8555059 7.103476e+06 51945094 4.614927e+07
2021-12-31 26765605 3.820287e+07 5568421 7.044972e+06 32334026 4.524784e+07

1826 rows × 6 columns

In [10]:
# Write dataframe to the output file

df_traffic_data_total.to_csv('traffic data output/traffic data out.csv')